import com.yeejoin.precontrol.common.fileparser.product.html.WordHtml;
import com.yeejoin.precontrol.common.fileparser.utils.FileHelper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.AltChunkType;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @description:
 * @author: duanwei
 * @date: 2020-07-14 19:04
 **/

@Slf4j
public class htmlToDoc {
    public static void main(String[] args) throws Exception {
        getHtmlByDoc();
        dataClear();
        html2doc(new File("D:\\opt\\test1.doc"), FileUtils.readFileToString(new File("D:\\opt\\test.html"), "UTF-8"));
    }


    /**
     * 1.根据文件生成html
     *
     * @throws Exception
     */
    public static void getHtmlByDoc() throws Exception {
        WordHtml wordHtml = new WordHtml();
        wordHtml.createHtml("D:\\opt\\test.doc", "D:\\opt\\test");
    }

    /**
     * 2.处理数据格式
     */
    public static void dataClear() {
        StringBuffer stringBuffer = FileHelper.readFile("D:\\opt\\test.html");
        String text = closeHTML(stringBuffer.toString());
        try {
            FileWriter fw = new FileWriter(new File("D:\\opt\\test.html"), false);
            BufferedWriter bw = new BufferedWriter(fw);
            bw.write(text);
            bw.flush();
            bw.close();
            fw.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    /**
     * html转doc
     *
     * @param file 输出doc文件
     * @param html html文本内容
     * @throws Exception
     */
    public static void html2doc(File file, String html) {
        log.info("开始html--->doc");
        try {
            WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
            String table = "<style type=\"text/css\"> table td{border:1px solid #000000} </style>";
            String htmlContent = "<html><head><title></title>" + table + "</head><body>" + html + "</body></html>";
            htmlContent = processDocStyle(htmlContent);
            wordMLPackage.getMainDocumentPart().addAltChunk(AltChunkType.Html, htmlContent.getBytes());
            wordMLPackage.save(file);
        } catch (Docx4JException e) {
            e.printStackTrace();
            log.error("html转doc出错:{}", e.getMessage());
        }
        log.info("转换完成html--->doc");

    }

    /**
     * 处理 doc 样式
     *
     * @param htmlContent
     * @return
     */
    private static String processDocStyle(String htmlContent) {
        Document doc = Jsoup.parse(htmlContent);
        doc.select("table").attr("cellspacing", "0px")
                .attr("cellpadding", "0px").attr("border-collapse", "collapse");
        return doc.outerHtml();
    }


    public static String closeHTML(String str) {
        List arrTags = new ArrayList();
        arrTags.add("br");
        arrTags.add("hr");
        arrTags.add("img");
        arrTags.add("meta");
        arrTags.add("META");
        for (int i = 0; i < arrTags.size(); i++) {
            for (int j = 0; j < str.length(); ) {
                int tagStart = str.indexOf("<" + arrTags.get(i), j);
                if (tagStart >= 0) {
                    int tagEnd = str.indexOf(">", tagStart);
                    j = tagEnd;
                    String preCloseTag = str.substring(tagEnd - 1, tagEnd);
                    if (!"/".equals(preCloseTag)) {
                        String preStr = str.substring(0, tagEnd);
                        String afterStr = str.substring(tagEnd);
                        str = preStr + "/" + afterStr;
                    }
                } else {
                    break;
                }
            }
        }
        return str;
    }


}
